Run download_data.Rmd and percentage_of_regional_richness.Rmd First!
city_data
fetch_city_data_for <- function(pool_name, include_city_name = F, include_pool_size = T) {
results_filename <- paste(paste(pool_name, 'city', 'richness', 'intercept', sep = "_"), "csv", sep = ".")
results <- read_csv(results_filename)
joined <- left_join(city_data, results)
pool_size_col_name <- paste(pool_name, 'pool', 'size', sep = "_")
required_columns <- c("population_growth", "rainfall_monthly_min", "rainfall_annual_average", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "temperature_monthly_max", "happiness_negative_effect", "happiness_positive_effect", "happiness_future_life", "number_of_biomes", "realm", "biome_name", "region_20km_includes_estuary", "region_50km_includes_estuary", "region_100km_includes_estuary", "city_includes_estuary", "region_20km_average_pop_density", "region_50km_average_pop_density", "region_100km_average_pop_density", "city_max_pop_density", "city_average_pop_density", "mean_population_exposure_topm2_5_2019", "region_20km_cultivated", "region_20km_urban", "region_50km_cultivated", "region_50km_urban", "region_100km_cultivated", "region_100km_urban", "region_20km_elevation_delta", "region_20km_mean_elevation", "region_50km_elevation_delta", "region_50km_mean_elevation", "region_100km_elevation_delta", "region_100km_mean_elevation", "city_elevation_delta", "city_mean_elevation", "urbanPshrubs", "permanent_water", "open_forest", "herbaceous_wetland", "herbaceous_vegetation", "cultivated", "closed_forest", "share_of_population_within_400m_of_open_space", "percentage_urban_area_as_streets", "percentage_urban_area_as_open_public_spaces_and_streets", "percentage_urban_area_as_open_public_spaces", "city_gdp_per_population", "city_ndvi", "city_ssm", "city_susm", "region_20km_ndvi", "region_20km_ssm", "region_20km_susm", "region_50km_ndvi", "region_50km_ssm", "region_50km_susm", "region_100km_ndvi", "region_100km_ssm", "region_100km_susm", "city_percentage_protected", "region_20km_percentage_protected", "region_50km_percentage_protected", "region_100km_percentage_protected")
if (include_pool_size) {
required_columns <- append(c(pool_size_col_name), required_columns)
}
if (include_city_name) {
required_columns <- append(c("name"), required_columns)
}
required_columns <- append(c("response"), required_columns)
joined[,required_columns]
}
merlin_city_data <- fetch_city_data_for('merlin')
merlin_city_data
library(randomForest)
randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.
library(reshape2)
library(rpart)
library(ggplot2)
Attaching package: ‘ggplot2’
The following object is masked from ‘package:randomForest’:
margin
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ tibble 3.1.2 ✓ dplyr 1.0.7
✓ tidyr 1.1.3 ✓ stringr 1.4.0
✓ readr 1.4.0 ✓ forcats 0.5.1
✓ purrr 0.3.4
── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::combine() masks randomForest::combine()
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
x ggplot2::margin() masks randomForest::margin()
library(multcomp)
Loading required package: mvtnorm
Loading required package: survival
Attaching package: ‘survival’
The following object is masked from ‘package:boot’:
aml
Loading required package: TH.data
Loading required package: MASS
Attaching package: ‘MASS’
The following object is masked from ‘package:dplyr’:
select
Attaching package: ‘TH.data’
The following object is masked from ‘package:MASS’:
geyser
library(car)
Loading required package: carData
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 methods overwritten by 'car':
method from
influence.merMod lme4
cooks.distance.influence.merMod lme4
dfbeta.influence.merMod lme4
dfbetas.influence.merMod lme4
Attaching package: ‘car’
The following object is masked from ‘package:dplyr’:
recode
The following object is masked from ‘package:purrr’:
some
The following object is masked from ‘package:boot’:
logit
merlin_city_data_fixed <- rfImpute(response ~ ., merlin_city_data)
merlin_city_data_fixed
source('./random_forest_selection_functions.R')
scale_parameter_name <- function(scale, postscript) {
paste('region', paste(scale, 'km', sep = ''), postscript, sep = '_')
}
scale_parameters <- function(postscript) {
c(scale_parameter_name(20, postscript), scale_parameter_name(50, postscript), scale_parameter_name(100, postscript))
}
scales_parameters_without <- function(scale_to_exclude, postscript) {
scales <- scale_parameters(postscript)
scales[scales != scale_parameter_name(scale_to_exclude, postscript)]
}
select_scales <- function(urban, cultivated, elevation_delta, mean_elevation, average_pop_density, includes_estuary, ssm, susm, ndvi, percentage_protected) {
append(
append(
append(
append(
scales_parameters_without(scale_to_exclude = urban, postscript = 'urban'),
scales_parameters_without(scale_to_exclude = cultivated, postscript = 'cultivated')
),
append(
scales_parameters_without(scale_to_exclude = elevation_delta, postscript = 'elevation_delta'),
scales_parameters_without(scale_to_exclude = mean_elevation, postscript = 'mean_elevation')
)
),
append(
append(
scales_parameters_without(scale_to_exclude = average_pop_density, postscript = 'average_pop_density'),
scales_parameters_without(scale_to_exclude = includes_estuary, postscript = 'includes_estuary')
),
append(
scales_parameters_without(scale_to_exclude = ssm, postscript = 'ssm'),
scales_parameters_without(scale_to_exclude = susm, postscript = 'susm')
)
)
),
append(
scales_parameters_without(scale_to_exclude = ndvi, postscript = 'ndvi'),
scales_parameters_without(scale_to_exclude = percentage_protected, postscript = 'percentage_protected')
)
)
}
select_scales(urban = 20, cultivated = 100, elevation_delta = 20, mean_elevation = 100, average_pop_density = NA, includes_estuary = NA, ssm = 20, susm = 20, ndvi = 100, percentage_protected = NA)
select_scales(urban = , cultivated = , elevation_delta = , mean_elevation = , average_pop_density = , includes_estuary = , ssm = , susm = , ndvi =, percentage_protected = )
select_variables_from_random_forest(merlin_city_data_fixed)
exclude_merlin <- !names(merlin_city_data_fixed) %in% select_scales(urban = 20, cultivated = 100, elevation_delta = 50, mean_elevation = 20, average_pop_density = 50, includes_estuary = NA, ssm = 100, susm = 50, ndvi = 20, percentage_protected = 50)
merlin_city_data_fixed_single_scale <- merlin_city_data_fixed[,exclude_merlin]
merlin_city_data_fixed_single_scale
select_variables_from_random_forest(merlin_city_data_fixed_single_scale)
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max", "city_ndvi")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max", "city_ndvi", "temperature_monthly_max")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max", "city_ndvi", "temperature_monthly_max", "city_average_pop_density")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max", "city_ndvi", "temperature_monthly_max", "city_average_pop_density", "region_50km_average_pop_density")])
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max", "city_ndvi", "temperature_monthly_max", "city_average_pop_density", "region_50km_average_pop_density", "rainfall_annual_average")])
“merlin_pool_size”, “biome_name”, “realm”
birdlife_city_data <- fetch_city_data_for('birdlife')
birdlife_city_data
birdlife_city_data_fixed <- rfImpute(response ~ ., birdlife_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 2.317 36.68 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 2.381 37.69 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 2.343 37.08 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 2.234 35.37 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 2.353 37.24 |
birdlife_city_data_fixed <- rfImpute(response ~ ., birdlife_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 2.41 38.15 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 2.438 38.60 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 2.299 36.39 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 2.348 37.16 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 2.158 34.16 |
birdlife_city_data_fixed
select_variables_from_random_forest(birdlife_city_data_fixed)
exclude_birdlife <- !names(birdlife_city_data_fixed) %in% select_scales(urban = 100, cultivated = 100, elevation_delta = 20, mean_elevation = 100, average_pop_density = 20, includes_estuary = NA, ssm = 50, susm = 100, ndvi = 100, percentage_protected = 100)
birdlife_city_data_fixed_single_scale <- birdlife_city_data_fixed[,exclude_birdlife]
birdlife_city_data_fixed_single_scale
select_variables_from_random_forest(birdlife_city_data_fixed_single_scale)
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "percentage_urban_area_as_streets")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "percentage_urban_area_as_streets", "rainfall_annual_average")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "percentage_urban_area_as_streets", "rainfall_annual_average", "city_susm")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "percentage_urban_area_as_streets", "rainfall_annual_average", "city_susm", "region_100km_ndvi")])
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "percentage_urban_area_as_streets", "rainfall_annual_average", "city_susm", "region_100km_ndvi", "happiness_future_life")])
“population_growth”, “region_50km_ssm”, “birdlife_pool_size”
| So…. |
|---|
| Merlin: “merlin_pool_size”, “biome_name”, “realm” Birdlife: “population_growth”, “region_50km_ssm”, “birdlife_pool_size” |
library(boot)
test_model(birdlife_city_data_fixed_no_boreal, response ~ birdlife_pool_size + realm)
[1] "R2 0.215771844466201"
[1] "CV Delta 5.38032952583311" "CV Delta 5.37866865996081"
| Use cross validation and dropping terms to find best model |
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ merlin_pool_size + population_growth + region_50km_ssm + biome_name + realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 14.51828
– Can we drop one?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ merlin_pool_size + population_growth + region_50km_ssm + biome_name, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 13.8059
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ merlin_pool_size + population_growth + region_50km_ssm + realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 13.46984
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ merlin_pool_size + population_growth + biome_name + realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 14.39671
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ merlin_pool_size + region_50km_ssm + biome_name + realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 14.30384
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + biome_name + realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 20.17595
– drop populaion_growth (14.30384)
– can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ merlin_pool_size + region_50km_ssm + biome_name, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 13.56925
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ merlin_pool_size + region_50km_ssm + realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 13.24412
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ merlin_pool_size + biome_name + realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 14.20889
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + biome_name + realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 20.17595
– drop ssm (14.20889)
– can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ merlin_pool_size + biome_name, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 13.38282
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ merlin_pool_size + realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 13.10131
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ biome_name + realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 20.56763
– drop biome (13.10131)
select_variables_from_random_forest(either_city_data_fixed_single_scale)
[1] "either_pool_size" "region_50km_ssm" "population_growth"
[4] "region_100km_cultivated" "realm" "region_20km_average_pop_density"
[7] "region_100km_susm" "city_ssm" "temperature_monthly_min"
[10] "shrubs" "region_50km_elevation_delta" "biome_name"
[13] "permanent_water" "rainfall_monthly_min" "share_of_population_within_400m_of_open_space"
[16] "percentage_urban_area_as_open_public_spaces" "mean_population_exposure_to_pm2_5_2019" "temperature_annual_average"
[19] "city_average_pop_density" "percentage_urban_area_as_open_public_spaces_and_streets" "happiness_positive_effect"
[22] "city_mean_elevation" "region_20km_ndvi" "city_ndvi"
[25] "rainfall_annual_average" "city_susm" "region_50km_mean_elevation"
[28] "temperature_monthly_max" "open_forest" "happiness_negative_effect"
[31] "herbaceous_vegetation" "closed_forest" "percentage_urban_area_as_streets"
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.48513
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ merlin_pool_size * realm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 13.85502
| – best model with merlin is pool size + realm (CV error 13.10131) |
summary(glm(data = merlin_city_data_fixed, formula = response ~ merlin_pool_size + realm))
Call:
glm(formula = response ~ merlin_pool_size + realm, data = merlin_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-8.5046 -1.6225 -0.5191 1.4001 16.3762
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.155064 0.927938 6.633 8.03e-10 ***
merlin_pool_size -0.028445 0.003408 -8.346 9.02e-14 ***
realmNearctic 3.089566 1.035703 2.983 0.00341 **
realmAustralasia -0.304872 2.107524 -0.145 0.88520
realmIndomalayan 1.888475 0.823167 2.294 0.02339 *
realmNeotropic 3.355053 0.997078 3.365 0.00101 **
realmAfrotropic 1.602668 1.215952 1.318 0.18981
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 12.36947)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 1608.0 on 130 degrees of freedom
AIC: 742.19
Number of Fisher Scoring iterations: 2
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ birdlife_pool_size + population_growth + region_50km_ssm + biome_name + realm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 5.817509
– can we drop a variable?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ birdlife_pool_size + population_growth + region_50km_ssm + biome_name, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.1237
test_model(either_city_data_fixed, response ~ either_pool_size)
[1] "R2 0.200828561945557"
[1] "CV Delta 4.17185408588075" "CV Delta 4.17145621339379"
test_model(either_city_data_fixed, response ~ either_pool_size + region_50km_ssm)
[1] "R2 0.218556419678468"
[1] "CV Delta 4.1385005044194" "CV Delta 4.13789124054347"
test_model(either_city_data_fixed, response ~ either_pool_size + region_50km_ssm + population_growth)
[1] "R2 0.218651209733749"
[1] "CV Delta 4.24526922992079" "CV Delta 4.24423824005575"
test_model(either_city_data_fixed, response ~ either_pool_size + region_50km_ssm + population_growth + region_100km_cultivated)
[1] "R2 0.240163336907313"
[1] "CV Delta 4.1925470002567" "CV Delta 4.19130237140134"
– drop biome (5.492536)
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ birdlife_pool_size + population_growth + region_50km_ssm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 5.686036
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ birdlife_pool_size + population_growth + realm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 5.499406
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ birdlife_pool_size + region_50km_ssm + realm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 5.38609
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ population_growth + region_50km_ssm + realm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.690224
– drop population growth (5.38609)
cor(either_city_data_fixed$residuals, either_city_data_fixed$response)
[1] 0.8072575
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ birdlife_pool_size + realm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 5.38033
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + realm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.637624
– drop ssm (5.38033)
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ birdlife_pool_size, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 5.613765
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ realm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.746644
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ birdlife_pool_size * realm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
| – so best model with birdlife is pool size + realm |
birdlife_city_data_fixed$realm <- factor(birdlife_city_data_fixed$realm, levels = c("Palearctic", "Nearctic", "Australasia", "Indomalayan", "Neotropic", "Afrotropic"))
summary(glm(data = birdlife_city_data_fixed, formula = response ~ birdlife_pool_size + realm))
Call:
glm(formula = response ~ birdlife_pool_size + realm, data = birdlife_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-5.9219 -1.2809 -0.3423 0.8298 9.5036
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.16482 0.64778 4.886 2.98e-06 ***
birdlife_pool_size -0.01493 0.00252 -5.926 2.62e-08 ***
realmNearctic 0.58805 0.63743 0.923 0.35796
realmAustralasia 1.04651 1.38301 0.757 0.45060
realmIndomalayan 1.87907 0.60496 3.106 0.00233 **
realmNeotropic 2.26051 0.70591 3.202 0.00171 **
realmAfrotropic 2.54804 0.89206 2.856 0.00499 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 5.241433)
Null deviance: 865.45 on 136 degrees of freedom
Residual deviance: 681.39 on 130 degrees of freedom
AIC: 624.56
Number of Fisher Scoring iterations: 2
ggplot(merlin_city_data_fixed_no_boreal, aes(x = merlin_pool_size, y = response)) + geom_point() + geom_smooth(method = "glm", se = F) + facet_wrap(~ realm)
`geom_smooth()` using formula 'y ~ x'
ggplot(birdlife_city_data_fixed_no_boreal, aes(x = birdlife_pool_size, y = response)) + geom_point() + geom_smooth(method = "glm", se = F) + facet_wrap(~ realm)
`geom_smooth()` using formula 'y ~ x'
ggplot(birdlife_city_data_fixed_no_boreal, aes(x = birdlife_pool_size, y = response)) +
geom_point(size=1) +
geom_smooth(method = "glm", se = F) +
geom_text(aes(label = name), data = birdlife_city_data_fixed_no_boreal[c(16, 53, 123),], size = 3, position = "dodge", vjust = "inward", hjust = "inward", color = "red", angle=-15) +
geom_point(data = birdlife_city_data_fixed_no_boreal[c(16, 53, 123),], color = "red") +
facet_wrap(~ realm) +
theme_bw() +
ylab("City Random Effect Intercept") + xlab("Regional Pool Size") + labs(title = "Birdlife")
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
ggsave("city_effect_richness_birdlife.jpg")
Saving 7.29 x 4.51 in image
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
merlin.fit <- glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ merlin_pool_size + realm)
summary(merlin.fit)
Call:
glm(formula = response ~ merlin_pool_size + realm, data = merlin_city_data_fixed_no_boreal)
Deviance Residuals:
Min 1Q Median 3Q Max
-8.5389 -1.7132 -0.5164 1.3898 16.2335
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.823312 1.406157 5.564 1.46e-07 ***
merlin_pool_size -0.028685 0.003401 -8.434 5.79e-14 ***
realmAustralasia -1.923665 2.294340 -0.838 0.403
realmIndomalayan 0.288224 1.193798 0.241 0.810
realmNearctic 1.499468 1.327678 1.129 0.261
realmNeotropic 1.767199 1.293308 1.366 0.174
realmPalearctic -1.490884 1.214536 -1.228 0.222
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 12.28537)
Null deviance: 2458.9 on 135 degrees of freedom
Residual deviance: 1584.8 on 129 degrees of freedom
AIC: 735.91
Number of Fisher Scoring iterations: 2
with(summary(merlin.fit), 1 - deviance/null.deviance)
[1] 0.3554797
plot(merlin.fit)
ggplot(merlin_city_data_fixed_no_boreal, aes(x = merlin_pool_size, y = response)) +
geom_point(size = 1) +
geom_smooth(method = "glm", se = F) +
geom_text(aes(label = name), data = merlin_city_data_fixed_no_boreal[c(7, 24, 77),], size = 3, position = "dodge", vjust = "inward", hjust = "inward", color = "red", angle=-15) +
geom_point(data = merlin_city_data_fixed_no_boreal[c(7, 24, 77),], color = "red") +
facet_wrap(~ realm) +
theme_bw() +
ylab("City Random Effect Intercept") + xlab("Regional Pool Size") + labs(title = "eBird")
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
ggsave("city_effect_richness_merlin.jpg")
Saving 7.29 x 4.51 in image
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
merlin_city_data_fixed_no_boreal$residuals <- resid(merlin.fit)
ggplot(merlin_city_data_fixed_no_boreal, aes(y = response, x = residuals)) +
geom_smooth(method = "lm", se = F) +
geom_point(aes(color = realm)) +
geom_text(aes(label = name), data = merlin_city_data_fixed_no_boreal[c(7, 24, 77),], size = 4, position = "dodge", vjust = "inward", hjust = "inward") +
labs(title = "Merlin", subtitle = paste("Correlation", cor(merlin_city_data_fixed_no_boreal$residuals, merlin_city_data_fixed_no_boreal$response))) +
theme_bw()
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
birdlife_city_data_fixed_no_boreal$residuals <- resid(birdlife.fit)
ggplot(birdlife_city_data_fixed_no_boreal, aes(y = response, x = residuals)) +
geom_smooth(method = "lm", se = F, alpha = 0.5) +
geom_point(aes(color = realm)) +
geom_text(aes(label = name), data = birdlife_city_data_fixed_no_boreal[c(16, 53, 124),], size = 4, position = "dodge", vjust = "inward", hjust = "inward") +
labs(title = "Birdlife", subtitle = paste("Correlation", cor(birdlife_city_data_fixed_no_boreal$residuals, birdlife_city_data_fixed_no_boreal$response))) +
theme_bw()
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
birdlife.aov <- aov(birdlife_city_data_fixed_no_boreal$residuals_of_fit ~ birdlife_city_data_fixed_no_boreal$realm)
summary(birdlife.aov)
Df Sum Sq Mean Sq F value Pr(>F)
birdlife_city_data_fixed_no_boreal$realm 5 0.16 0.0315 0.022 1
Residuals 130 184.77 1.4213
merlin_city_data_fixed_no_boreal$residuals_of_fit <- resid(lm(response ~ residuals, merlin_city_data_fixed_no_boreal))
ggplot(merlin_city_data_fixed_no_boreal, aes(x = realm, y = residuals_of_fit)) + geom_boxplot()
merlin.aov <- aov(merlin_city_data_fixed_no_boreal$residuals_of_fit ~ merlin_city_data_fixed_no_boreal$realm)
summary(merlin.aov)
Df Sum Sq Mean Sq F value Pr(>F)
merlin_city_data_fixed_no_boreal$realm 5 0.2 0.040 0.006 1
Residuals 130 873.9 6.722
ggplot(merlin_city_data_fixed_no_boreal, aes(residuals)) + geom_histogram(binwidth = 1)
ggplot(birdlife_city_data_fixed_no_boreal, aes(residuals)) + geom_histogram(binwidth = 1)
shapiro.test(birdlife_city_data_fixed_no_boreal$response)
Shapiro-Wilk normality test
data: birdlife_city_data_fixed_no_boreal$response
W = 0.93733, p-value = 8.842e-06
shapiro.test(birdlife_city_data_fixed_no_boreal$residuals)
Shapiro-Wilk normality test
data: birdlife_city_data_fixed_no_boreal$residuals
W = 0.94253, p-value = 2.095e-05
shapiro.test(birdlife_city_data_fixed_no_boreal$residuals_of_fit)
Shapiro-Wilk normality test
data: birdlife_city_data_fixed_no_boreal$residuals_of_fit
W = 0.90395, p-value = 7.391e-08
shapiro.test(merlin_city_data_fixed_no_boreal$response)
Shapiro-Wilk normality test
data: merlin_city_data_fixed_no_boreal$response
W = 0.95308, p-value = 0.0001366
shapiro.test(merlin_city_data_fixed_no_boreal$residuals)
Shapiro-Wilk normality test
data: merlin_city_data_fixed_no_boreal$residuals
W = 0.93318, p-value = 4.568e-06
shapiro.test(merlin_city_data_fixed_no_boreal$residuals_of_fit)
Shapiro-Wilk normality test
data: merlin_city_data_fixed_no_boreal$residuals_of_fit
W = 0.94707, p-value = 4.596e-05
Variances of groups are NOT significantly different:
bartlett.test(merlin_city_data_fixed_no_boreal$residuals, merlin_city_data_fixed_no_boreal$realm)
Bartlett test of homogeneity of variances
data: merlin_city_data_fixed_no_boreal$residuals and merlin_city_data_fixed_no_boreal$realm
Bartlett's K-squared = 12.245, df = 5, p-value = 0.03159
leveneTest(merlin_city_data_fixed_no_boreal$residuals, merlin_city_data_fixed_no_boreal$realm)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 5 1.22 0.3033
130
Variances of groups are NOT significantly different:
bartlett.test(birdlife_city_data_fixed_no_boreal$residuals, birdlife_city_data_fixed_no_boreal$realm)
Bartlett test of homogeneity of variances
data: birdlife_city_data_fixed_no_boreal$residuals and birdlife_city_data_fixed_no_boreal$realm
Bartlett's K-squared = 16.504, df = 5, p-value = 0.005544
leveneTest(birdlife_city_data_fixed_no_boreal$residuals, birdlife_city_data_fixed_no_boreal$realm)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 5 1.4422 0.2134
130
Variances of groups are significantly different:
bartlett.test(merlin_city_data_fixed_no_boreal$residuals_of_fit, merlin_city_data_fixed_no_boreal$realm)
Bartlett test of homogeneity of variances
data: merlin_city_data_fixed_no_boreal$residuals_of_fit and merlin_city_data_fixed_no_boreal$realm
Bartlett's K-squared = 63.781, df = 5, p-value = 2.006e-12
leveneTest(merlin_city_data_fixed_no_boreal$residuals_of_fit, merlin_city_data_fixed_no_boreal$realm)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 5 7.1996 5.587e-06 ***
130
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Variances of groups are significantly different:
bartlett.test(birdlife_city_data_fixed_no_boreal$residuals_of_fit, birdlife_city_data_fixed_no_boreal$realm)
Bartlett test of homogeneity of variances
data: birdlife_city_data_fixed_no_boreal$residuals_of_fit and birdlife_city_data_fixed_no_boreal$realm
Bartlett's K-squared = 77.857, df = 5, p-value = 2.355e-15
leveneTest(birdlife_city_data_fixed_no_boreal$residuals_of_fit, birdlife_city_data_fixed_no_boreal$realm)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 5 9.9859 4.199e-08 ***
130
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
| Check AIC |
AIC(
glm(data = merlin_city_data_fixed, formula = response ~ merlin_pool_size + population_growth + region_50km_ssm + biome_name + realm),
glm(data = merlin_city_data_fixed, formula = response ~ merlin_pool_size + realm)
)
AIC(
glm(data = birdlife_city_data_fixed, formula = response ~ birdlife_pool_size + population_growth + region_50km_ssm + biome_name + realm),
glm(data = birdlife_city_data_fixed, formula = response ~ birdlife_pool_size + realm)
)